/************************************************************************* 
Program name: clean_tradeline03.sas;
Objective: This program 
   * constructs the mop variable (a cleaned version of currmop)
   * sets negative date values to missing
   * creates the pay3059, pay6080, and pay90up variables, which indicate how many months an individual was 
     in these delinquency categories
   * creates indicator variables from the dispute codes that indicate whether a derogatory is associated with
     an account, a bankruptcy, a judgment, etc
**************************************************************************/

OPTIONS LINESIZE=79;

libname local '/data/';
libname out '/data';

%include "/data/tu_formats.sas";

* To be consistent with the earlier years, we put the tradeline files back into one file;
* Bob split them for space reasons but our data set is smaller;
data combine;
  set local.trader03 local.tradem03;
run;

/* There are 12,401 observations in this dataset with missing teditseq. None of 
   these observations have any information other than person id. I merged these 
   person ids back with the rolled-up score dataset to find out what is going on.
   For 4300 of the observations, the people did not have any reported trades in 
   the roll-up dataset, but did have public records or inquries (which explains 
   why we have a record of them at all). The other 8101 observations did have 
   trades reported.  I counted their trades in the tradeline dataset to make sure 
   this number matched the rolled-up number from the score dataset.  In all cases,
   it did.  This means that for these people, we have extra blank observations, 
   but at least all of their trades are accounted for, and we drop them. 
*/
data combine;
     set combine;
     if teditseq ne "";
run;


* The 2003 and 2004 programs have KOB on them.  I delete KOB to be consistent with the other years.;
data out.cleantrade03;
   set combine (rename=(oldmop=currmop maxdelvl=c3 nummnths=c4 past3059=c5 past6089=c6 past90up=c7 oldbal=acctbal) drop=kob);

   length kob $1.;
   length maxdelvl mopmax mop 3.;
   length nummnths past3059 past6089 past90up paymnths paymop1 pay3059 pay6089 pay90119 pay120up paymax 3.;
   length maxdelam dtopen l 8.;
   length maj dis non clo col for rep crg jug oth ban pai mop 3.;

   /* Consolidates all the "8" (repossession) and "9" (bad debt / collection) values*/
   /* Recodes "UR" (unrated) and "UC" (unclassified) with numeric values*/
   if currmop in ('00','01','02','03','04','05','06','07','08','09')
      then mop=input(currmop,best12.);
   else if substr(currmop,1,1) eq '8' then mop=8;
   else if substr(currmop,1,1) eq '9' then mop=9;
   else if currmop eq 'UR' then mop=10;
   else if currmop eq 'UC' then mop=11;

   kob=substr(subcdt,1,1);

   keep  acctype amtpast assoccd collfld crdtlim acctbal currmop
   dispcd dtclose dtopen dtpdout dtrept dtveri ecoa hicredit
   loantypt maxdelam maxdeldt maxdelvl nummnths past3059 past6089
   past90up paypat subcdt terms mop dup editseq 
   kob mopmax: paymnths: paymax: subname paymop1: pay3059: pay6089: pay90119: pay120up:
   maj dis non clo col for rep crg jug oth ban pai teditseq person paypat:;

   /* fix funny value of terms*/
   if terms eq 'ZZZZZZZZ' then terms = .;

   /* This code dates from the days when mainframe storage was expensive and programmers needed to be
  parsimonious with their variable lengths.  Our dataset is smaller than Bob's so we don't really need
  this code, although I keep it.*/
  maxdelvl=c3;
  nummnths=c4;
  past3059=c5;
  past6089=c6;
  past90up=c7;

  /* Sets negative date values to missing*/
  /* These five data recodes were originally written as (for example) IF DTOPEN LE 0 LE 0 THEN DTOPEN=.  Getting rid
  of the second LE 0 does not affect the number of dates set to missing so I have removed it. KMP 6/07*/

  if dtopen le 0 then dtopen=.;
  if dtclose le 0 then dtclose=.;
  if dtpdout le 0 then dtpdout=.;
  if dtrept  le 0 then dtrept=.;
  if dtveri  le 0 then dtveri=.;

  if maxdelam le 0 then maxdelam=.;
  if maxdeldt le 0 then maxdeldt=.;
  if maxdelvl le 0 then maxdelvl=.;

  /* Creates new variables that indicate how many times a payment was 30 to 59 days late, 60 to 89 days late, etc
  over the past 48 months.  These variables are based on the "paypat" variable, which contains 48 months
  of payment history.  These variables are only created for consumers with non-missing paypats.*/

  if substr(paypat,1,1) ne ' ' then do;
     pay3059=0;
     pay6089=0;
     pay90119=0;
     pay120up=0;
     paymnths=0;
     paymop1=0;
     mopmax=0;
     paymax=0;

  do k=1 to 48;
     if substr(paypat,k,1) eq ' ' then go to cc;
     paymnths=paymnths+1;
     if substr(paypat,k,1) eq 'X' then go to aa;
     paymax=k;
     l=input(substr(paypat,k,1), best12.);
   	if l gt mopmax then mopmax=l;
   	if l eq 1 then paymop1=paymop1+1;
   	if l eq 2 then pay3059=pay3059+1;
   	if l eq 3 then pay6089=pay6089+1;
     	if l eq 4 then pay90119=pay90119+1;
	if l eq 5 then pay120up = pay120up+1;
     aa: end;
     cc: end;

  if mopmax le 0 then do;
     pay3059=.;
     pay6089=.;
     pay90119=.;
     pay120up=.;
     paymnths=.;
     mopmax=.;
     paymop1=.;
     paymax=.;
  end;


	/*create a variable called paypat_24 that is substring of paypat.*/
/*  Paypat_24 is a string of payment history over the last 24 months (i.e. since the last survey). */
/*Because the most recent date of paypat depends on the date of the MOP variable, the length of paypat_24 depends on the date of the MOP variable.*/

	format mopdate yymmn6.;
	if mop ge 2 then do;
	   if dtclose gt 0 then mopdate = input(put(dtclose,6.),yymmn6.);
	   if dtclose le 0 and dtpdout gt 0 then mopdate = input(put(dtpdout,6.),yymmn6.);
	   if dtclose le 0 and dtpdout le 0 then mopdate = input(put(dtveri,6.),yymmn6.);
	end;
	if mop lt 2 then do;
	   if dtpdout gt 0 then mopdate = input(put(dtpdout,6.),yymmn6.);
	   if dtpdout le 0 and dtclose gt 0 then mopdate = input(put(dtclose,6.),yymmn6.);
	   if dtpdout le 0 and dtclose le 0 then mopdate = input(put(dtveri,6.),yymmn6.);
	end;
	lenpaypat = intck('month','01jun2001'd,mopdate)-1;
	if lenpaypat ge 1 and substr(paypat,1,1) ne ' ' then paypat_24 = substr(paypat,1,lenpaypat);

  	if substr(paypat_24,1,1) ne ' ' then do;
     	   pay3059_24=0;
     	   pay6089_24=0;
     	   pay90119_24=0;
     	   pay120up_24=0;
     	   paymnths_24=0;
     	   paymop1_24=0;
	   mopmax_24=0;
     	   paymax_24=0;

     	do k=1 to 24;
     	   if substr(paypat_24,k,1) eq ' ' then go to dd;
     	   paymnths_24=paymnths_24+1;
     	   if substr(paypat_24,k,1) eq 'X' then go to ff;
     	   paymax_24=k;
     	   l=input(substr(paypat_24,k,1), best12.);
     	   if l gt mopmax_24 then mopmax_24=l;			
     	   if l eq 1 then paymop1_24=paymop1_24+1;
     	   if l eq 2 then pay3059_24=pay3059_24+1;
     	   if l eq 3 then pay6089_24=pay6089_24+1;
     	   if l eq 4 then pay90119_24=pay90119_24+1;
	   if l eq 5 then pay120up_24 = pay120up_24+1;
     	   dd: end;
     	   ff: end;

    	   if mopmax_24 le 0 then do;	
     	      pay3059_24=.;
	      pay6089_24=.;
	      pay90119_24=.;
	      pay120up_24=.;
	      paymnths_24=.;
	      mopmax_24=.;
	      paymop1_24=.;
	      paymax_24=.;
           end;


  /************************DO DISPUTE RECODES*****************************/
  IF DISPCD NE '   ' THEN DO;
     ARRAY THR MAJ DIS NON CLO COL FOR REP CRG JUG OTH BAN PAI;
     DO OVER THR;
     	THR=0;
     END;

     /* Major derogatory associated with the account*/
     IF DISPCD IN ('AM ','AS ','BKL','CCA','RLD','CLA','PRL','JUD','MOV',
     	'RLP','RPO','RRE','RVD','RVN','RVP','RVR','WEP','CTS','FCL','SET',
     	'PPL','STU','DLU','CCG','DLF','FFB','FPD','FRD','GOF','GMD','RDP',
  	'LBR','PCL','ETD','ETA','SRG','SLD','FPI','BDC','BDI','BDM','CAG',
  	'CCC','COL','CPP','CSF','GVC','RPF','RVC','RVE','PBD','POA','FPS',
  	'CHK','AOA','PTD','VPD','FHA','CPM','REA','RAC','PBB','MCC','PPA',
  	'SBB','ER ','PPD','RPD','SLP','PWG','PDD','CLS','SK ','PLP','ERX',
  	'CTX','CBL','CBR','PNR','PFC','TTR','CBT','BKW','JUG','FTS','FTB',
  	'ETS','ETB','ETI','IRE','IRO','IRB','CCD','CDD','CED','BKD','BCD',
  	'BKC','BRR','BRC','CDL','CDC','CRD','CRL','CDR','CRC','CRR','CRV',
  	'CDT','CTC','CRT','CTV','WPD','WPC','WCD','WRC','WRR') THEN MAJ=1;

      /* Account information is in dispute*/
      IF DISPCD IN ('AA ','AB ','AD ','AE ','AF ','AG ','AH ','AI ','AJ ',
      	 'AK ','AL ','AN ','AP ','AR ','AT ','RS ','DP ','DRP','CD ','DIS',
  	 'DRS','LBR','CNR','DRC','DRX','AJX','DGX','AID','CAD','CBD','CFD',
  	 'BKD','BCD','BRR','BRC','CDL','CDC','CRD','CRL','CDR','CRC','CRR',
  	 'CRV','CTD','CTC','CRT','CTV','WPD','WCD','WRC','WRR','DRG','DGR')
  	 THEN DIS=1;

     /* Nonderogatory?*/
     IF DISPCD IN ('AA ','AB ','AD ','AE ','AF ','AG ','AH ','AI ','AJ ',
     	'AK ','AL ','AN ','AP ','AT ','DP ','DRP','CD ','DIS','AJP','STL',
  	'TRF','RFN','CLO','INS','ASM','CBC','CBG','DRS','ELN','FHI','GEA',
  	'GFS','GGG','GOP','LEA','PRD','SDL','SGL','NIR','TRL','UDL','VAL',
  	'VAR','DEC','SLA','SPD','PPC','VCC','VPI','HEQ','INC','ICP','AOA',
  	'CNR','CSA','OFS','ET ','AMD','AC ','BTA','IA ','DRG','IB ','NIB',
  	'SPL','SIL','PAL','CTR','AAP','CLC','DRC','INP','AFR','DM ','CLB',
  	'ACR','ACT','DRX','DDR','AJX','NIX','CWC','AND','NPA','INA','PDE',
  	'CPB','PDI','AID','FTO','ETO','PLL','CAD','CBD','CFD','UGL')
  	THEN NON=1;

     /* Account closed*/
     IF DISPCD IN ('STL','TRF','RFN','CLO','ASM','CBC','CBG','TRL','DEC',
     	'ETD','ETA','CCC','OFS','ET ','AC ','BTA','PAL','CTR','AAP','CLC',
  	'ACR','ACT','FTO','ETO','IRO','PLL','CAD','CBD','CCD','CDD','CED',
  	'CFD','BCD','BKC','BRC','CDC','CRD','CRC','CRR','CTC','CRT','WPC',
  	'WCD','WRC') THEN CLO=1;

     /* Bankruptcy filing associated with the account*/
     IF DISPCD IN ('BKL','WEP','FFB','REA','RAC','PBB','SBB','CBL','CBR',
     	'CBT','BKW','CCD','CDD','CED','BKD','BCD','BKC','BRR','BRC','CDL',
	'CDC','CRD','CRL','CDR','CRC','CRR','CRV','CDT','CTC','CRT','CTV',
  	'WPD','WPC','WCD','WRC','WRR') THEN BAN=1;

     /* Collection associated with the account*/
     IF DISPCD IN ('CLA','RDP','PCL','CCC','COL','CPP','PBD','AOA','CPM',
     	'CWC') THEN COL=1;

     /* Account balance paid to zero*/
     IF DISPCD IN ('RLD','PPL','INS','RDP','PCL','PBD','CPM','ER ','PPD',
     	'INP','RPD','PDD','ERX','PFC') THEN PAI=1;

     /* Foreclosure*/
     IF DISPCD IN ('FCL','DLU','DLF','FPD','FRD','FPI','FPS','FHA')
     	THEN FOR=1;

      /* Repossession;*/
      IF DISPCD IN ('AM ','RLD','RLP','RPO','RRE','RVD','RVN','RVP','RVR',
      	 'GVC','RPF','RVC','RVE','RPD','PDD','PFC','IRE','IRO','IRB')
  	 THEN REP=1;

     /* Chargeoff associated with the account*/
     IF DISPCD IN ('PRL','SET','PPL','STU','ETD','ETA','SRG','SLD','BDC',
     	'BDI','BDM','CAG','ET ','SLP','SK ','PLP','TTR','FTB','ETB','ETI')
  	THEN CRG=1;

     /* Judgment associated with the account*/
     IF DISPCD IN ('AS ','JUD','JUG') THEN JUG=1;

     /* Other*/
     IF DISPCD IN ('DEC','DM ','CLB','NPA') THEN OTH=1;

   END;

   IF DTCLOSE LE 0 AND CLO EQ 1 THEN DTCLOSE=.;

run;

proc freq data = out.cleantrade03;
  tables mop kob pay3059 pay6089 pay90119 pay120up mopmax;
  format mop mop. kob $kob.;
run;

proc freq data = out.cleantrade03;
  tables MAJ DIS NON CLO COL FOR REP CRG JUG OTH BAN PAI;
run;

proc print data= out.cleantrade03 (obs=10);
run;
